library(dplyr)
library(lme4)
library(moments)
library(modelsummary)
library(broom.mixed)

data<-readRDS("table5_data.rds")


services <- c("Waste_treatment", "Waste_collection", "Transport","Fire",  "Libraries",
  "Civil_protection",  "Drinking_water", "Sewer")
 


# Updated transformation function to include density
transform_skewed_data <- function(data) {
  # Function to handle skewness transformation
  transform_variable <- function(x) {
    skew <- skewness(x, na.rm = TRUE)
    
    if (abs(skew) > 1) {
      if (skew > 0) {
        # For positive skew, try log transformation if all values are positive
        if (min(x, na.rm = TRUE) > 0) {
          x_transformed <- log(x)
        } else {
          # If there are zero or negative values, shift the data
          x_shifted <- x - min(x, na.rm = TRUE) + 1
          x_transformed <- log(x_shifted)
        }
      } else {
        # For negative skew, try exponential transformation
        x_transformed <- exp(scale(x))
      }
      return(as.vector(scale(x_transformed)))
    } else {
      return(as.vector(scale(x)))
    }
  }
  
  # Apply transformations and scaling - DENSITY REPLACES POPULATION
  data %>%
    mutate(
      VOTANTS_PERCENT = transform_variable(VOTANTS_PERCENT),
      EUROS_HABITANT = transform_variable(EUROS_HABITANT),
      Densitat..hab..km.. = transform_variable(Densitat..hab..km..),  # Density variable
      year = factor(year)
    )
}

# Density model (density replaces population and population squared)
safe_glmer_density <- function(service_name, data) {
  service_data <- data %>% filter(service == service_name)
  
  tryCatch({
    model <- glmer(imc_dummy ~ Densitat..hab..km.. + VOTANTS_PERCENT + EUROS_HABITANT + 
                     (1 | year) + (1 | muni),
                   data = service_data,
                   family = binomial,
                   control = glmerControl(
                     optimizer = "bobyqa",
                     optCtrl = list(maxfun = 2e4),
                     check.conv.grad = .makeCC("warning", 0.2),
                     check.conv.singular = .makeCC(action = "warning", tol = 1e-4)
                   ))
    
    if(isSingular(model)) {
      cat("Singular fit for", service_name, "\n")
    }
    
    return(model)
  }, error = function(e) {
    cat("Error in", service_name, "model:\n")
    print(e)
    return(NULL)
  })
}

# Function to calculate VIF for mixed-effects models
calculate_vif_glmer <- function(model, service_name) {
  tryCatch({
    # Extract fixed effects design matrix
    X <- model.matrix(model, type = "fixed")
    # Remove intercept column
    X <- X[, -1, drop = FALSE]
    
    # Calculate VIF using correlation matrix approach
    if (ncol(X) > 1) {
      # Calculate correlation matrix
      cor_matrix <- cor(X)
      # Calculate VIF as diagonal of inverse correlation matrix
      vif_values <- diag(solve(cor_matrix))
      names(vif_values) <- colnames(X)
      return(vif_values)
    } else {
      return(c("Only one predictor" = NA))
    }
  }, error = function(e) {
    cat("Error calculating VIF for", service_name, ":\n")
    print(e)
    return(NULL)
  })
}

# Transform the data with skewness handling
data_transformed <- transform_skewed_data(data)

# Check for missing values in the density variable
cat("Missing values in density variable:", sum(is.na(data_transformed$Densitat..hab..km..)), "\n")
cat("Summary of density variable after transformation:\n")
print(summary(data_transformed$Densitat..hab..km..))

# Fit density models for each service
service_names <- unique(data_transformed$service)
cat("\n=== FITTING DENSITY MODELS (Density replaces Population variables) ===\n")
models_density <- setNames(
  lapply(service_names, function(x) safe_glmer_density(x, data_transformed)),
  service_names
)

# Remove NULL models
model_summaries <- models_density[!sapply(models_density, is.null)]

# Calculate VIF for each service
cat("\n=== VARIANCE INFLATION FACTORS (VIF) FOR DENSITY MODELS ===\n")
vif_results <- list()

for (service_name in names(model_summaries)) {
  cat("\n--- VIF for", service_name, "---\n")
  
  model <- model_summaries[[service_name]]
  if (!is.null(model)) {
    vif_values <- calculate_vif_glmer(model, service_name)
    
    if (!is.null(vif_values)) {
      vif_results[[service_name]] <- vif_values
      
      # Print VIF values
      print(round(vif_values, 3))
      
      # Flag high VIF values
      high_vif <- vif_values[vif_values > 5 & !is.na(vif_values)]
      if (length(high_vif) > 0) {
        cat("*** WARNING: High VIF detected (>5):\n")
        print(round(high_vif, 3))
      }
      
      very_high_vif <- vif_values[vif_values > 10 & !is.na(vif_values)]
      if (length(very_high_vif) > 0) {
        cat("*** SEVERE WARNING: Very high VIF detected (>10):\n")
        print(round(very_high_vif, 3))
      }
    }
  }
}

# Create summary table of VIF results
cat("\n=== VIF SUMMARY TABLE ===\n")
if (length(vif_results) > 0) {
  vif_df <- do.call(rbind, lapply(names(vif_results), function(service) {
    vif_vals <- vif_results[[service]]
    data.frame(
      Service = service,
      Variable = names(vif_vals),
      VIF = round(vif_vals, 3),
      stringsAsFactors = FALSE
    )
  }))
  
  print(vif_df)
  
  cat("\nVIF Summary Statistics:\n")
  cat("Mean VIF:", round(mean(vif_df$VIF, na.rm = TRUE), 3), "\n")
  cat("Max VIF:", round(max(vif_df$VIF, na.rm = TRUE), 3), "\n")
  cat("Variables with VIF > 5:", sum(vif_df$VIF > 5, na.rm = TRUE), "\n")
  cat("Variables with VIF > 10:", sum(vif_df$VIF > 10, na.rm = TRUE), "\n")
}

# Function to check singularity and convergence issues
check_model_issues <- function(models) {
  issues <- lapply(names(models), function(name) {
    model <- models[[name]]
    if (!is.null(model)) {
      singular <- isSingular(model)
      conv <- model@optinfo$conv$lme4$messages
      c(Singular = singular,
        Convergence = !is.null(conv),
        Messages = if(!is.null(conv)) paste(conv, collapse="; ") else "None")
    }
  })
  names(issues) <- names(models)
  return(do.call(rbind, issues))
}

# Get model diagnostics
model_issues <- check_model_issues(model_summaries)
cat("\n=== MODEL FITTING ISSUES ===\n")
print(model_issues)

# Create summary table
cat("\n=== DENSITY MODEL SUMMARY (Density replaces Population) ===\n")
modelsummary(
  model_summaries,
  stars = TRUE)

# Save results to Word document
modelsummary(
  model_summaries,
  stars = TRUE,
  output = "mixed_effects_density_model.docx"
)